import sqlite3
from sqlite3.dbapi2 import Cursor
from sqlite3 import Error
from pandas import DataFrame, read_json,to_datetime
import pandas as pd
from pathlib import Path
from itertools import combinations
import numpy as np
import datetime
import random
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import plotly.express as px
pd.options.plotting.backend = "plotly"
import plotly.offline as pyo
pyo.init_notebook_mode()
from scipy.spatial import distance_matrix
FILE = 'BTC_USDT-1m.json'
PATH = str(Path(".").resolve().parent.parent) + '/user_data/data/binance/'
PATH + FILE
#import data
data = read_json(PATH + FILE ,orient='values')
data.columns = ['Date','Open', 'High', 'Low', 'Close', 'Volume']
data = data.astype(dtype={'Open': 'float', 'High': 'float', #'Date' : 'int',
'Low': 'float', 'Close': 'float', 'Volume': 'float'})
data['Date'] = pd.to_datetime(data["Date"], unit='ms')
data
df_scale = data.copy()
maxvalue = df_scale['Close'].max()
minvalue = df_scale['Close'].min()
difmaxmin = maxvalue - minvalue
df_scale['Scale'] = (df_scale['Close'] - minvalue) / difmaxmin
df_scale
df_scale
fig = go.Figure()
fig.add_trace(go.Scatter(x=df_scale.Date, y=df_scale['Scale'],
mode='lines',
name='BTC'))
fig.show()
DATABASE = 'twitter_database.sqlite'
DATABASE_PATH = str(Path(".").resolve().parent.parent) + '/' + DATABASE
def create_connection(db_file):
""" create a database connection to a SQLite database """
conn = None
try:
conn = sqlite3.connect(db_file)
cursor = conn.cursor()
cursor.execute("CREATE TABLE if not exists Prices (id INTEGER PRIMARY KEY, symbol TEXT, price REAL, created_at TEXT)")
print(sqlite3.version)
except Error as e:
print(e)
finally:
if conn:
conn.close()
def get_prices(conn, currency):
cursor = conn.cursor()
#cursor.execute("SELECT * FROM Prices WHERE symbol = '" + currency + "' AND time(Created_at) > time('2022-08-04 00:00') ORDER BY id ")
cursor.execute("SELECT * FROM Prices WHERE symbol = '" + currency + "' ORDER BY id ")
result = cursor.fetchall()
return result
conn = sqlite3.connect(DATABASE_PATH)
currency = "BTCUSDT"
results = get_prices(conn, currency)
df = pd.DataFrame(results, columns=['Id', 'Symbol', currency , 'Created_at'])
df = df[["Created_at", currency]]
df = df[df['Created_at'] > '2022-08-05T00:00:00.000']
df = df.rename(columns={"BTCUSDT": "Close"})
df
| Created_at | Close | |
|---|---|---|
| 12676 | 2022-08-05T01:24:23.000Z | 22663.09 |
| 12677 | 2022-08-05T01:24:25.000Z | 22659.69 |
| 12678 | 2022-08-05T01:24:26.000Z | 22656.64 |
| 12679 | 2022-08-05T01:24:28.000Z | 22657.16 |
| 12680 | 2022-08-05T01:24:30.000Z | 22656.73 |
| ... | ... | ... |
| 89828 | 2022-08-06T08:58:01.000Z | 23153.72 |
| 89829 | 2022-08-06T08:58:02.000Z | 23153.73 |
| 89830 | 2022-08-06T08:58:03.000Z | 23152.86 |
| 89831 | 2022-08-06T08:58:05.000Z | 23153.83 |
| 89832 | 2022-08-06T08:58:06.000Z | 23152.22 |
77157 rows × 2 columns
df_filter = df.copy()
s_array = df_filter["Created_at"].to_numpy(dtype='datetime64')
s_array
array(['2022-08-05T01:24:23.000', '2022-08-05T01:24:25.000',
'2022-08-05T01:24:26.000', ..., '2022-08-06T08:58:03.000',
'2022-08-06T08:58:05.000', '2022-08-06T08:58:06.000'],
dtype='datetime64[ms]')
df_fig = df.copy()
fig = go.Figure()
fig.add_trace(go.Scatter(x=df_fig.Created_at, y=df_fig['Close'],
mode='lines',
name='BTC'))
fig.show()
from gtda.homology import VietorisRipsPersistence
from gtda.time_series import SlidingWindow
from numpy import array
window_size = 25
stride = 1
y = df['Close'].to_numpy()
x = df['Created_at'].to_numpy(dtype='datetime64')
X = np.arange(df.shape[0])
SW = SlidingWindow(size=window_size, stride=stride)
X_sw, yr = SW.fit_transform_resample(y, y)
X_sw.shape
#print(x)
#print(X_sw[0], yr[0])
#print(X_sw[1])
(77133, 25)
def get_change(current, previous):
if current == previous:
return 0
try:
return ((current - previous) / previous) * 100.0
except ZeroDivisionError:
return float('inf')
XX_ = np.arange(len(y))
XX_sw, yyr = SW.fit_transform_resample(XX_, XX_)
print(f'Shape: {XX_sw.shape}')
XX_sw
Shape: (77133, 25)
array([[ 0, 1, 2, ..., 22, 23, 24],
[ 1, 2, 3, ..., 23, 24, 25],
[ 2, 3, 4, ..., 24, 25, 26],
...,
[77130, 77131, 77132, ..., 77152, 77153, 77154],
[77131, 77132, 77133, ..., 77153, 77154, 77155],
[77132, 77133, 77134, ..., 77154, 77155, 77156]])
dif_valid_futures = []
dif_seconds = []
differences = []
future_values = 1000
windows_count = XX_sw.shape[0]
porcentual_reference = .5
#windows_count = 20
for i in np.arange(windows_count):
diff_seconds = (x[XX_sw[i][window_size-1]] - x[XX_sw[i][0]]) / np.timedelta64(1, 's')
#dif_seconds.append(diff_seconds)
#print(f'Index1: {XX_sw[i][0]}, Index1: {XX_sw[i][window_size-1]}' +
# f', Valor1: {x[XX_sw[i][0]]}, Valor 2: {x[XX_sw[i][window_size-1]]}, Diff (s): { diff_seconds }')
# Ventana de índices: XX_sw[i]
# Último índice de la ventana: XX_sw[i][window_size-1]
# Siguiente índice del último índice de la ventana: XX_sw[i][window_size-1] + 1
# Último índice a tomar en cuenta en los siguientes future_values: XX_sw[i][window_size-1] + 1 + future_values
#print(XX_sw[i][window_size-1],XX_sw[i][window_size-1] + 1+ future_values, len(y), XX_sw[i][window_size -1] + 1 + future_values <= len(y))
if(XX_sw[i][window_size -1] + 1 + future_values <= len(y)):
start_index = XX_sw[i][window_size -1] + 1
end_index = XX_sw[i][window_size -1] + 1 + future_values
max_future_value = max(y[start_index : end_index])
last_value_window = yr[i]
difference = (max_future_value - last_value_window)/ last_value_window * 100
differences.append(difference)
dif_valid_futures.append(1 if difference > porcentual_reference else 0)
dif_seconds.append(diff_seconds)
#print( f'StartIndexPrice: {start_index} , EndIndexPrice: {end_index}, Value: {y[XX_sw[i][window_size -1] + 1]} , MaxPrice: {max(y[start_index : end_index])}, Dif: {difference}')
print(f' Average: {np.average(differences)}, Deviation: {np.std(differences)}, Max: {np.max(differences)}, Min: {np.min(differences)}')
fig = go.Figure()
fig.add_trace(go.Scatter(x=df_fig.Created_at, y=differences,
#mode='lines',
name='Differences'))
fig.show()
Average: 0.2523690414515434, Deviation: 0.25614744689893415, Max: 1.553885037738386, Min: -0.16454993605582818
print(f' Average: {np.average(dif_seconds)}, Deviation: {np.std(dif_seconds)}, Max: {np.max(dif_seconds)}, Min: {np.min(dif_seconds)}')
fig = go.Figure()
fig.add_trace(go.Scatter(x=np.arange(len(dif_seconds)), y=dif_seconds,
mode='lines',
name='Differencev (seconds)'))
fig.show()
Average: 35.3631670891729, Deviation: 38.62567720105243, Max: 2021.0, Min: 32.0
#print(len(dif_seconds), type(dif_seconds))
def dif_check(d):
return d <= 40
dif_seconds_filter = filter(dif_check, dif_seconds)
#dif_seconds = filter(lambda dif: dif <= 38, dif_seconds)
dif_seconds_filter = list(dif_seconds_filter)
print(f' Average: {np.average(dif_seconds_filter)}, Deviation: {np.std(dif_seconds_filter)}, Max: {np.max(dif_seconds_filter)}, Min: {np.min(dif_seconds)}')
fig = go.Figure()
#x = df_fig.Created_at
#x = np.arange(len(dif_seconds_filter))
fig.add_trace(go.Scatter(x=np.arange(len(dif_seconds_filter)), y=dif_seconds_filter,
mode='lines',
name='Difference (seconds)'))
fig.show()
Average: 34.1150078863308, Deviation: 1.163568573680349, Max: 40.0, Min: 32.0
dt_dif_seconds = pd.DataFrame(dif_seconds)
dt_dif_seconds['IsValidTimeWindow'] = dt_dif_seconds[0] <= 40
dt_dif_seconds.reset_index(inplace = True)#.reset_index(drop=True)
dt_dif_seconds.reset_index(drop=True)
print(dt_dif_seconds.head())
df_valid_windows = dt_dif_seconds[dt_dif_seconds['IsValidTimeWindow'] == True]
df_valid_array = np.array(df_valid_windows.index)
df_valid_array
index 0 IsValidTimeWindow 0 0 36.0 True 1 1 35.0 True 2 2 35.0 True 3 3 35.0 True 4 4 34.0 True
array([ 0, 1, 2, ..., 76130, 76131, 76132])
dif_valid = []
differences_valid = []
X_sw_valid = []
forlength = len(df_valid_array);
#forlength = 20
for i in np.arange(forlength):
index = df_valid_array[i]
differences_valid.append(differences[index])
dif_valid.append(dif_valid_futures[index])
X_sw_valid.append(X_sw[index])
print(f' X: {len(X_sw)}, Y: {len(dif_valid_futures)}, Diferencias: {len(differences)}')
print('')
print(f' X: {len(X_sw_valid)}, Y: {len(dif_valid)}, Diferencias: {len(differences_valid)}')
X: 77133, Y: 76133, Diferencias: 76133 X: 75447, Y: 75447, Diferencias: 75447
dif_valid_futures
values, counts = np.unique(dif_valid, return_counts=True)
len(dif_valid),values, counts,
(75447, array([0, 1]), array([65594, 9853]))
X_sw = X_sw_valid
wdata = array(X_sw)
wwindow_size = 2
wstride = 1
ii = len(wdata)
#ii = 1
#print(wdata.shape)
wwdata = []
for i in np.arange(ii):
y = wdata[i]
SW = SlidingWindow(size= wwindow_size, stride=wstride)
wX_sw, wyr = SW.fit_transform_resample(y, y)
#print(wX_sw.shape)
wwdata.append(wX_sw)
wwdata = array(wwdata)
wwdata.shape
#Imprimir 4 nubes de puntos
(75447, 24, 2)
print(f"Son {wwdata.shape[0]} nubes de puntos en {wwdata.shape[2]} dimensiones, "
f"cada una con {wwdata.shape[1]} puntos.")
Son 75447 nubes de puntos en 2 dimensiones, cada una con 24 puntos.
plots = 8
rows = plots
x_ = np.arange(len(wdata))
points_clouds_index = random.choices(np.arange(wwdata.shape[0]), k=plots)
#print(points_clouds_index)
#XX.add_trace(go.Scatter(x=x_, y=y_, mode='lines+markers', name='BTC'), row = 1, col = 2)
for i in np.arange(plots):
fig = make_subplots( rows = 1, cols=2)
y_ = X_sw[points_clouds_index[i]]
df_graph = pd.DataFrame(wwdata[points_clouds_index[i]], columns = ['X','Y'])
fig.add_trace(go.Scatter(x = x_, y = y_), row = 1, col= 1)
fig.add_trace(go.Scatter(x = df_graph.X, y = df_graph.Y, mode='markers'), row=1, col=2)
fig.show()
#fig.update_layout(title_text="Nubes de puntos")
##Serie de tiempo + Nube
ii = len(wdata)
wdiagrams = []
start_time = datetime.datetime.now()
for i in np.arange(ii):
_wwdata = wwdata[i]
d = distance_matrix(_wwdata, _wwdata)
max_distance = np.max(d)
_wwdata = _wwdata.reshape((1, _wwdata.shape[0], _wwdata.shape[1]))
VR = VietorisRipsPersistence(homology_dimensions=[0,1], max_edge_length = max_distance, reduced_homology = False, n_jobs=-1)
diagrams = VR.fit_transform(_wwdata)
wdiagrams.append(diagrams[0])
end_time = datetime.datetime.now()
difference = end_time - start_time
duration_in_s = difference.total_seconds()
print('Duracion: ', duration_in_s , 'segundos')
Duracion: 26360.103794 segundos
from gtda.plotting import plot_diagram
from plotly.subplots import make_subplots
num_of_plots = 3
x_ = np.arange(len(wdata))
diagram_index = np.random.choice(len(wdata), num_of_plots , replace=False)
#print(diagram_index)
#pdiagrams = random.sample(x_, 6)
for i in range(num_of_plots):
ii = diagram_index[i]
y_ = X_sw[ii]
#fig = make_subplots(rows = 1, cols=2)
#fig.add_trace(go.Scatter(x=x_, y=y_, mode='lines', name=currency), row = 1, col = 1)
#fig.add_trace(go.Scatter(x=x_, y=y_, mode='lines', name=currency), row = 1, col = 2)
#fig.show()
XX = plot_diagram(wdiagrams[ii], plotly_params = {
'layout' : {
'title' : { 'text' : ''},
'grid_columns': 3,
'width' : 980,
'plot_bgcolor': '#E5ECF6',
},'traces' : {
}
})
XX.set_subplots(1, 2)
#XX.add_trace(go.Scatter(x=x_, y=y_, name='BTC'), row = 1, col = 2)
XX.add_trace(go.Scatter(x = df.X, y = df.Y, mode='markers'), row=1, col=2)
XX.show()
#Serie de tiempo + Homología
np.savetxt('result_2.csv', dif_valid, delimiter=",", fmt='%d')
from gtda.diagrams import BettiCurve
BC = BettiCurve()
bettis = []
ii = len(wdiagrams)
for i in range(ii):
_phdiagram = wdiagrams[i].reshape((1, wdiagrams[i].shape[0], wdiagrams[i].shape[1]))
_data_bc = BC.fit_transform(_phdiagram)
bettis.append(_data_bc[0][0])
bettis.append(_data_bc[0][1])
np.savetxt('bettis_2.csv', bettis, delimiter=",", fmt='%d')